/*********************************************************************
Replication code for Systemic Discrimination Among Large U.S. Employers
Patrick M. Kline, Evan K. Rose, Christopher R. Walters
April, 2022

This file produces the estimates in Table 5. It requires top-level
directory be set to the replication folder using the global below.
*********************************************************************/

* Replication archive directory
global dir "/accounts/projects/pkline/randres/randres/replication"

*Results storage
set seed 992782
local B=500
matrix T=J(`B',200,.)
local c=1

*Load data
use ${dir}/data/data.dta, clear

*Code characteristics and outcomes
gen B=black
gen Y=cb
bys firm_id: gen firstobs=_n==1

*Keep firms with high enough contact rates
bys firm_id: egen ybar=mean(Y)
keep if ybar>0.03

*Calculate firm-specific coefs and SEs
egen f=group(firm_id)
sum f, detail
local F=r(max)
local f=1
foreach e in reg logit poisson {
		gen c_`e'=.
		gen b_`e'=.
		gen v_cc_`e'=.
		gen v_cb_`e'=.
		gen v_bb_`e'=.
		if "`e'"!="reg" {
			gen v_breg_c`e'=.
			gen v_breg_b`e'=.
		}
}
gen b_test=.
gen v_b_test=.
gen c_test=.
gen v_c_test=.
gen v_cb_test=.
gen v_regc_test=.
gen v_regb_test=.

qui while `f'<=`F' {

	*Estimate linear model
	reg Y B if f==`f', vce(cluster job_id)
	
		*LPM coefs and SEs
		replace c_reg=_b[_cons] if f==`f'
		replace b_reg=_b[B] if f==`f'
		matrix V=e(V)
		replace v_cc_reg=V[2,2] if f==`f'
		replace v_cb_reg=V[1,2] if f==`f'
		replace v_bb_reg=V[1,1] if f==`f'
		
		*Logit coefs and SEs
		replace c_logit=log(c_reg/(1-c_reg)) if f==`f'
		replace b_logit=log((c_reg+b_reg)/(1-(c_reg+b_reg))) - c_logit if f==`f'
		matrix d=J(2,2,.)
		matrix d[2,2]=1/(_b[_cons]*(1-_b[_cons]))
		matrix d[2,1]=(1/((_b[_cons]+_b[B])*(1-(_b[_cons]+_b[B]))))-d[2,2]
		matrix d[1,2]=0
		matrix d[1,1]=d[2,1]+d[2,2]
		matrix V_logit=d'*V*d
		replace v_cc_logit=V_logit[2,2] if f==`f'
		replace v_cb_logit=V_logit[1,2] if f==`f'
		replace v_bb_logit=V_logit[1,1] if f==`f'
		
		*Poisson coefs and SEs
		replace c_poisson=log(_b[_cons]) if f==`f'
		replace b_poisson=log(_b[_cons]+_b[B])-c_poisson if f==`f'
		matrix d=J(2,2,.)
		matrix d[2,2]=1/(_b[_cons])
		matrix d[2,1]=(1/(_b[_cons]+_b[B]))-d[2,2]
		matrix d[1,2]=0
		matrix d[1,1]=d[2,1]+d[2,2]
		matrix V_poisson=d'*V*d
		replace v_cc_poisson=V_poisson[2,2] if f==`f'
		replace v_cb_poisson=V_poisson[1,2] if f==`f'
		replace v_bb_poisson=V_poisson[1,1] if f==`f'
		
		*Covs between LPM and logit
		
			*Reg slope and logit intercept
			matrix d=J(2,1,.)
			matrix d[1,1]=1
			matrix d[2,1]=-(1/(_b[_cons]*(1-_b[_cons])))
			matrix v=d'*V*d
			replace v_breg_clogit=0.5*(v_bb_reg+v_cc_logit-v[1,1]) if f==`f'
			
			*Reg slope and logit slope
			matrix d=J(2,1,.)
			matrix d[1,1]=1-(1/((_b[_cons]+_b[B])*(1-(_b[_cons]+_b[B]))))
			matrix d[2,1]=d[1,1]-1+(1/(_b[_cons]*(1-_b[_cons])))
			matrix v=d'*V*d
			replace v_breg_blogit=0.5*(v_bb_reg+v_bb_logit-v[1,1]) if f==`f'
			
			
		*Covs between LPM and Poisson
		
			*Reg slope and poisson intercept
			matrix d=J(2,1,.)
			matrix d[1,1]=1
			matrix d[2,1]=-1/_b[_cons]
			matrix v=d'*V*d
			replace v_breg_cpoisson=0.5*(v_bb_reg+v_cc_poisson-v[1,1]) if f==`f'
			
			*Reg slope and poisson slope
			matrix d=J(2,1,.)
			matrix d[1,1]=1-(1/(_b[_cons]+_b[B]))
			matrix d[2,1]=d[1,1]-1+(1/_b[_cons])
			matrix v=d'*V*d
			replace v_breg_bpoisson=0.5*(v_bb_reg+v_bb_poisson-v[1,1]) if f==`f'
			
			local ++f
}

matrix popmeans = J(6,1,.)
local r = 1
foreach var of varlist c_reg b_reg c_logit b_logit c_poisson b_poisson {
	qui su `var'
	matrix popmeans[`r',1] = r(mean)
	local ++r
} 


***Compute full sample estimates
preserve
keep if firstobs==1
foreach e in reg logit poisson {

	*First four uncentered, centered, and standardized moments
	foreach p in c b {
	
		gen m1_`p'_`e'=`p'_`e'
		gen m2_`p'_`e'=(`p'_`e'^2) - (v_`p'`p'_`e')
		gen m3_`p'_`e'=(`p'_`e'^3)-(3*`p'_`e'*(v_`p'`p'_`e'))
		gen m4_`p'_`e'=(`p'_`e'^4)-(6*(`p'_`e'^2)*(v_`p'`p'_`e'))+(3*(v_`p'`p'_`e'^2))
		foreach m of numlist 1/4 {
				qui sum m`m'_`p'_`e'
				local m`m'_`p'_`e'=r(mean)
		}
		gen mu1_`p'_`e' = 0
		gen mu2_`p'_`e' = `m2_`p'_`e'' - ((`m1_`p'_`e'')^2)
		gen mu3_`p'_`e' = `m3_`p'_`e'' - (3*(`m2_`p'_`e'')*(`m1_`p'_`e'')) + (2*((`m1_`p'_`e'')^3))
		gen mu4_`p'_`e' = `m4_`p'_`e'' -(4*(`m1_`p'_`e'')*(`m3_`p'_`e''))+(6*(`m2_`p'_`e'')*((`m1_`p'_`e'')^2)-(3*((`m1_`p'_`e'')^4)))
		foreach m of numlist 1/4 {
			gen c`m'_`p'_`e'=mu`m'_`p'_`e'/(mu2_`p'_`e'^(`m'/2))
		}
		gen sigma_`p'_`e'=sqrt(mu2_`p'_`e')
	}
	
	*Covariance and correlation between intercept and slope for this model
	gen cov_`e'=(c_`e'-`m1_c_`e'')*(b_`e'-`m1_b_`e'')-v_cb_`e'
	sum cov_`e'
	local cov_`e'=r(mean)
	gen corr_`e'=`cov_`e''/sqrt(mu2_c_`e'*mu2_b_`e')

}

*Covariances and correlations for logit/poisson intercept and slope with regression slope and intercept
foreach e in logit poisson {
	foreach p in c b {
			gen cov_`p'`e'_breg = (`p'_`e'-`m1_`p'_`e'')*(b_reg - `m1_b_reg') - v_breg_`p'`e'
			sum cov_`p'`e'_breg
			gen corr_`p'`e'_breg = r(mean)/sqrt(mu2_`p'_`e'*mu2_b_reg)
	}
}

*Display and store full sample estimates
mat allres = J(100,2,.)
local list ""
local r = 1
foreach x of varlist m1* m2* m3* m4* mu1* mu2* mu3* mu4* c1* c2* c3* c4* sigma_* cov_* corr_* {

	qui sum `x'
	local t=r(mean)
	disp "Full sample: `x'=`t'"
	matrix allres[`r',1] = `t'
	local list "`list' `x'"
	local `x'=r(mean)
	local ++r
}
restore


*Bootstrap for SEs on variance components
local b=1
while `b'<=`B' {

	qui {
	
		preserve

		*Draw bootstrap weights for this iteration
		gen w = - log(1 - uniform())
		bys job_id: replace w=w[1]
		
		*Calculate firm-specific estimates in this iteration
		drop c* b* v*
		local f=1
		foreach e in reg logit poisson {
				gen c_`e'=.
				gen b_`e'=.
				gen v_cc_`e'=.
				gen v_cb_`e'=.
				gen v_bb_`e'=.
				if "`e'"!="reg" {
					gen v_breg_c`e'=.
					gen v_breg_b`e'=.
				}
		}
		qui while `f'<=`F' {
		
		*Estimate linear model
		reg Y B if f==`f' [aw=w], vce(cluster job_id)
	
		*LPM coefs and SEs
		replace c_reg=_b[_cons] if f==`f'
		replace b_reg=_b[B] if f==`f'
		matrix V=e(V)
		replace v_cc_reg=V[2,2] if f==`f'
		replace v_cb_reg=V[1,2] if f==`f'
		replace v_bb_reg=V[1,1] if f==`f'
		
		*Logit coefs and SEs
		replace c_logit=log(c_reg/(1-c_reg)) if f==`f'
		replace b_logit=log((c_reg+b_reg)/(1-(c_reg+b_reg))) - c_logit if f==`f'
		matrix d=J(2,2,.)
		matrix d[2,2]=1/(_b[_cons]*(1-_b[_cons]))
		matrix d[2,1]=(1/((_b[_cons]+_b[B])*(1-(_b[_cons]+_b[B]))))-d[2,2]
		matrix d[1,2]=0
		matrix d[1,1]=d[2,1]+d[2,2]
		matrix V_logit=d'*V*d
		replace v_cc_logit=V_logit[2,2] if f==`f'
		replace v_cb_logit=V_logit[1,2] if f==`f'
		replace v_bb_logit=V_logit[1,1] if f==`f'
		
		*Poisson coefs and SEs
		replace c_poisson=log(_b[_cons]) if f==`f'
		replace b_poisson=log(_b[_cons]+_b[B])-c_poisson if f==`f'
		matrix d=J(2,2,.)
		matrix d[2,2]=1/(_b[_cons])
		matrix d[2,1]=(1/(_b[_cons]+_b[B]))-d[2,2]
		matrix d[1,2]=0
		matrix d[1,1]=d[2,1]+d[2,2]
		matrix V_poisson=d'*V*d
		replace v_cc_poisson=V_poisson[2,2] if f==`f'
		replace v_cb_poisson=V_poisson[1,2] if f==`f'
		replace v_bb_poisson=V_poisson[1,1] if f==`f'
		
		*Covs between LPM and logit
		
			*Reg slope and logit intercept
			matrix d=J(2,1,.)
			matrix d[1,1]=1
			matrix d[2,1]=-(1/(_b[_cons]*(1-_b[_cons])))
			matrix v=d'*V*d
			replace v_breg_clogit=0.5*(v_bb_reg+v_cc_logit-v[1,1]) if f==`f'
			
			*Reg slope and logit slope
			matrix d=J(2,1,.)
			matrix d[1,1]=1-(1/((_b[_cons]+_b[B])*(1-(_b[_cons]+_b[B]))))
			matrix d[2,1]=d[1,1]-1+(1/(_b[_cons]*(1-_b[_cons])))
			matrix v=d'*V*d
			replace v_breg_blogit=0.5*(v_bb_reg+v_bb_logit-v[1,1]) if f==`f'
			
			
		*Covs between LPM and Poisson
		
			*Reg slope and poisson intercept
			matrix d=J(2,1,.)
			matrix d[1,1]=1
			matrix d[2,1]=-1/_b[_cons]
			matrix v=d'*V*d
			replace v_breg_cpoisson=0.5*(v_bb_reg+v_cc_poisson-v[1,1]) if f==`f'
			
			*Reg slope and poisson slope
			matrix d=J(2,1,.)
			matrix d[1,1]=1-(1/(_b[_cons]+_b[B]))
			matrix d[2,1]=d[1,1]-1+(1/_b[_cons])
			matrix v=d'*V*d
			replace v_breg_bpoisson=0.5*(v_bb_reg+v_bb_poisson-v[1,1]) if f==`f'
			
			local ++f
			
		}
		
		*Store estimated parameters
		keep if firstobs==1
		foreach e in reg logit poisson {

			*First four uncentered, centered, and standardized moments
			foreach p in c b {
			
				gen m1_`p'_`e'=`p'_`e'
				gen m2_`p'_`e'=(`p'_`e'^2) - (v_`p'`p'_`e')
				gen m3_`p'_`e'=(`p'_`e'^3)-(3*`p'_`e'*(v_`p'`p'_`e'))
				gen m4_`p'_`e'=(`p'_`e'^4)-(6*(`p'_`e'^2)*(v_`p'`p'_`e'))+(3*(v_`p'`p'_`e'^2))
				foreach m of numlist 1/4 {
						qui sum m`m'_`p'_`e'
						local m`m'_`p'_`e'=r(mean)
				}
				gen mu1_`p'_`e' = 0
				gen mu2_`p'_`e' = `m2_`p'_`e'' - ((`m1_`p'_`e'')^2)
				gen mu3_`p'_`e' = `m3_`p'_`e'' - (3*(`m2_`p'_`e'')*(`m1_`p'_`e'')) + (2*((`m1_`p'_`e'')^3))
				gen mu4_`p'_`e' = `m4_`p'_`e'' -(4*(`m1_`p'_`e'')*(`m3_`p'_`e''))+(6*(`m2_`p'_`e'')*((`m1_`p'_`e'')^2)-(3*((`m1_`p'_`e'')^4)))
				foreach m of numlist 1/4 {
					gen c`m'_`p'_`e'=mu`m'_`p'_`e'/(mu2_`p'_`e'^(`m'/2))
				}
				gen sigma_`p'_`e'=sqrt(mu2_`p'_`e')
			}
			
			*Covariance and correlation between intercept and slope for this model
			gen cov_`e'=(c_`e'-`m1_c_`e'')*(b_`e'-`m1_b_`e'')-v_cb_`e'
			sum cov_`e'
			local cov_`e'=r(mean)
			gen corr_`e'=`cov_`e''/sqrt(mu2_c_`e'*mu2_b_`e')

		}
		
		*Covariances and correlations for logit/poisson intercept and slope with regression slope and intercept
		foreach e in logit poisson {
			foreach p in c b {
					gen cov_`p'`e'_breg = (`p'_`e'-`m1_`p'_`e'')*(b_reg - `m1_b_reg') - v_breg_`p'`e'
					sum cov_`p'`e'_breg
					gen corr_`p'`e'_breg = r(mean)/sqrt(mu2_`p'_`e'*mu2_b_reg)
			}
		}

		*Store estimates
		foreach x of varlist m1* m2* m3* m4* mu1* mu2* mu3* mu4* c1* c2* c3* c4* sigma_* cov_* corr_* {
		
			qui sum `x'
			matrix T[`b',`c']=r(mean)
			local ++c
		
		}

		restore

	}
	
	local prog=`b'/`B'
	disp "BS progress: `prog'"
	local c=1
	local ++b
}

****Show results
clear
svmat T
local t=1
foreach x in `list' {
	qui sum T`t', detail
	local sd=r(sd)
	disp "Parameter `x': est=``x'', se=`sd'"
	matrix allres[`t',2] = `sd'
	local ++t
}
clear
svmat allres
rename allres1 est
rename allres2 sd
gen param = ""
local t=1
foreach x in `list' {
	replace param = "`x'" if _n == `t'
	local ++t
}

outsheet using ${dir}/dump/table5.csv, comma replace

